
from init import PATHS
import logging
LOGGER = logging.getLogger(__name__)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('seaborn')
import seaborn as sns
sns.set(style="white")
pal = sns.color_palette('colorblind')
from C_PatentVariables import conventions_names_colors
from E_Analysis import reading_datasets
dict_subsector_shortnames, dict_var_colors, list_of_subsectors_in_cleancars = conventions_names_colors.main()



def main():
    LOGGER.info('Begin c_othergraphs_suppliers.py')
    export_top10_newsuppliers()
    plot_activesuppliers_stocks_balanced()
    plot_activesupplierpatenting()
    LOGGER.info('END c_othergraphs_suppliers.py')



def export_top10_newsuppliers():
    suppliers = reading_datasets.read_supplier_firmlevel_panel()
    """ we're assuming minimal double patenting by firms as we sum across firms"""
    maskYears = (suppliers['earliest_filing_year'] >= 2003) & (suppliers['earliest_filing_year'] < 2017)
    maskActive20032017 = (suppliers['Active'] == 1) & maskYears
    autosectorMask = suppliers['4digitNAICS'].notnull() & (suppliers['4digitNAICS'].str.contains('3361|3362|3363', regex=True)) & maskActive20032017
    newguardMask = (suppliers['oldguard'] == 0) & (~autosectorMask) & maskActive20032017
    # Export top 10
    newguys = suppliers[newguardMask][['name', 'bvdid', 'region']].drop_duplicates()
    total = suppliers[newguardMask]['Count_Bat_excl'].sum()
    newguys_share = 100 * suppliers[newguardMask].groupby(['bvdid'])['Count_Bat_excl'].sum().rename('ShareNewGuysinBatPatenting') / total
    newguys_share = newguys_share.sort_values(ascending=False)
    newguys = newguys.merge(newguys_share, on='bvdid', how='outer').sort_values(by='ShareNewGuysinBatPatenting', ascending=False)
    newguys_mean = suppliers[newguardMask].groupby('bvdid')[['Stock_Bat_excl', 'Stock']].mean()
    newguys = newguys.merge(newguys_mean, on='bvdid', how='outer')
    # Number of links
    supplierids = pd.read_csv(PATHS.dropbox / 'Data_outputted/B_FactsetVariables/suppliers_ids.csv')
    list_factsetids_of_newguys = supplierids[supplierids['bvdid'].isin(newguys['bvdid'].drop_duplicates().tolist())]['fctid'].drop_duplicates().tolist()
    links = pd.read_csv(PATHS.dropbox / 'Data_outputted/B_FactsetVariables/SC_ProdSuppCross.csv')
    maskyears = links['Year_start'] >= 2009
    links = links[links['SUPPLIER_ID'].isin(list_factsetids_of_newguys) & maskyears]
    Nbr_oems = links.groupby('SUPPLIER_ID')['OEM_Level1_ID'].nunique().rename('Nbr_oems').reset_index()
    Nbr_oems['Share_links'] = 100 * Nbr_oems['Nbr_oems'] / links.shape[0]
    Nbr_oems = Nbr_oems.merge(supplierids, left_on='SUPPLIER_ID', right_on='fctid', how='left')
    newguys = newguys.merge(Nbr_oems[['bvdid', 'Nbr_oems', 'Share_links']].drop_duplicates(), on='bvdid', how='left')
    table = newguys.sort_values(by='ShareNewGuysinBatPatenting', ascending=False).iloc[:10, :]
    table = format_table(table)
    with open(PATHS.tables / 'top10_newsuppliers.tex', "w") as f:
        f.write(table.to_latex(index=False).replace('lllrrrl', 'rcccccc'))

    table = newguys[newguys['region'] == 'US'].sort_values(by='ShareNewGuysinBatPatenting', ascending=False).iloc[:10, :]
    table = format_table(table)
    with open(PATHS.tables / 'top10_newsuppliers_fromtheUS.tex', "w") as f:
        f.write(table.to_latex(index=False).replace('lllrrrl', 'rcccccc'))




def format_table(newguys):
    table = newguys.drop(columns=['bvdid'])
    table['ShareNewGuysinBatPatenting'] = table['ShareNewGuysinBatPatenting'].apply(lambda x: '{:.2%}'.format(x/100))
    table['Share_links'] = table['Share_links'].apply(lambda x: '{:.2%}'.format(x/100))
    table['Stock_Bat_excl'] = table['Stock_Bat_excl'].astype(int)
    table['Stock'] = table['Stock'].astype(int)
    table['Nbr_oems'] = table['Nbr_oems'].astype(int)
    table = table.rename(columns={'name': 'Name', 'region': 'Region',
                                  'ShareNewGuysinBatPatenting': 'Battery Patent Concentration',
                                  'Stock_Bat_excl': 'Battery Stock',
                                  'Stock': 'Overall Stock',
                                  'Nbr_oems': 'Nbr OEMs',
                                  'Share_links': '% New Links'
                                  })
    return table


def plot_activesuppliers_stocks_balanced():
    oemsupplierlinks_supplierstocks = reading_datasets.get_oem_supplier_activelinks_with_supplierstocks(balanced=True)
    # we need to keep only active links.
    oemsupplierlinks_supplierstocks = oemsupplierlinks_supplierstocks[oemsupplierlinks_supplierstocks['Active']]
    new_suppliers = oemsupplierlinks_supplierstocks[oemsupplierlinks_supplierstocks['Years_since_firstactive'] == 0]
    preexisting_suppliers = oemsupplierlinks_supplierstocks[oemsupplierlinks_supplierstocks['Years_since_firstactive'] > 0]
    # PLOT
    tech = 'Bat'
    BATmean_new_suppliers = new_suppliers.groupby('Year')[f'Stock_{tech}'].mean()
    BATmean_preexisting_suppliers = preexisting_suppliers.groupby('Year')[f'Stock_{tech}'].mean()
    tech = 'FC'
    FCmean_new_suppliers = new_suppliers.groupby('Year')[f'Stock_{tech}'].mean()
    FCmean_preexisting_suppliers = preexisting_suppliers.groupby('Year')[f'Stock_{tech}'].mean()
    fig, ax = plt.subplots()
    plt.plot(BATmean_new_suppliers, label='Battery Stock, New Suppliers', color=dict_var_colors['Count_Bat'][1], linewidth=3)
    plt.plot(BATmean_preexisting_suppliers, label='Battery Stock, Pre-existing Suppliers', color=dict_var_colors['Count_Bat'][1], linewidth=3, linestyle='--')
    plt.plot(FCmean_new_suppliers, label='Fuel Cell Stock, New Suppliers', color=dict_var_colors['Count_FC'][1], linewidth=3)
    plt.plot(FCmean_preexisting_suppliers, label='Fuel Cell Stock, Pre-existing Suppliers', color=dict_var_colors['Count_FC'][1], linewidth=3, linestyle='--')
    plt.legend(ncol=1)
    plt.xlabel('Year')
    plt.ylabel('Mean Patent Stock of Active Suppliers')
    plt.savefig(PATHS.figures / 'supplier_spillovers' / 'activesuppliers_stocks_new_vs_existing_balanced.png', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'supplier_spillovers' / 'activesuppliers_stocks_new_vs_existing_balanced.pdf', bbox_inches='tight')
    plt.close()




def plot_activesupplierpatenting():
    dfsupplieryear = reading_datasets.read_supplier_firmlevel_panel()
    dfsupplieryear = dfsupplieryear[dfsupplieryear['Active'] == 1]
    beg_year, end_year = 1990, 2016
    maskYears = (dfsupplieryear['earliest_filing_year'] >= beg_year) & (dfsupplieryear['earliest_filing_year'] < end_year)
    data = dfsupplieryear[maskYears].groupby('earliest_filing_year').sum()
    """ we're assuming minimal double patenting by firms as we sum across firms"""
    data['Count_OtherCleanCar'] = data['Count_EV_excl'] + data['Count_H2_excl'] + data['Count_Stor_excl'] + data['Count_HV_excl']
    data['Count_ICE_andEffICE_excl'] = data['Count_ICE_excl'] + data['Count_EffICE_excl']
    plt.plot(data['Count_FC_excl'], label=dict_var_colors['Count_FC_excl'][0], color=dict_var_colors['Count_FC_excl'][1], linewidth=3)
    plt.plot(data['Count_Bat_excl'], label=dict_var_colors['Count_Bat_excl'][0], color=dict_var_colors['Count_Bat_excl'][1], linewidth=3)
    plt.plot(data['Count_OtherCleanCar'], label=dict_var_colors['Count_OtherCleanCar'][0], color=dict_var_colors['Count_OtherCleanCar'][1], linewidth=2)
    plt.plot(data['Count_ICE_andEffICE_excl'], label=dict_var_colors['Count_ICE_andEffICE_excl'][0], color=dict_var_colors['Count_ICE_andEffICE_excl'][1], linewidth=2)
    plt.legend(ncol=1)
    plt.xlabel('Year')
    plt.ylabel('Number of Patent Families')
    plt.xticks(list(range(max(beg_year, data.index.min()), end_year, 2)))
    plt.savefig(PATHS.figures / 'supplier_spillovers' / 'activesupplierspatenting.png', bbox_inches='tight')
    plt.savefig(PATHS.figures / 'supplier_spillovers' / 'activesupplierspatenting.pdf', bbox_inches='tight')
    plt.close()

